# script for creating files with fuel calculations and local pollution marginal damages
# saves two files, one at track level, one at downsampled point level
    # 1) Down samples points along tracks - calculates values for fuel calculation
    # 2) Down samples further - determines marginal damage at each downsampled point
    # 3) Aggregates to track level (without local pollution) and saves as CSV
    # 4) Saves points as CSV and layer 
    # 5) Uses original downsampled data to create speed bins

# Imports #####################################################################
# for use with anaconda
try:
    import archook #The module which locates arcgis
    archook.get_arcpy()
    import arcpy
except ImportError:
    error
    # do whatever you do if arcpy isnt there.

# IMPORTS
import numpy as np
#import numpy.lib.recfunctions as rf
import datetime
#import time
import os
import pandas
#import pytz
import time
#import os

import dist_speed_functions as dist_funs

arcpy.env.overwriteOutput=True
#numpy.set_printoptions(threshold=numpy.nan)

###############################################################################
# Options

### Output Locations
# v2 - generates 4/2020 results - interpolated tracks seperated
# v2a - includes flag for interpolated points and measure of na eca

#### Input Locations ##### 
tracks_file = r"H:\My Drive\Boats\ReplicationCode\data\AIS\voyages.gdb\wc%s" 
tracks_where = "" # 
out_tags = ["_2009" ,"_2010","_2011","_2012","_2013","_2014","_2015","_2016"] # list of years to process

## Interpolated Track Times
interp_times = r"H:\My Drive\Boats\ReplicationCode\data\AIS\interp_times\interp_times.csv"

## Boundary Files
CAeca2009_file = r"H:\My Drive\Boats\ReplicationCode\data\boundaries\CAeca_200907.shp"
CAeca2011_file = r"H:\My Drive\Boats\ReplicationCode\data\boundaries\CAeca_201112.shp"
NAbounds_file = r"H:\My Drive\Boats\ReplicationCode\data\boundaries\NorthAmericaBounds.shp"
StudyAreabounds_file = r"H:\My Drive\Boats\ReplicationCode\data\boundaries\StudyAreaBounds.shp"
NAECA_file = r"H:\My Drive\Boats\ReplicationCode\data\boundaries\NA_ECA_polygon.shp"

# Damage grids
# AP2 + distance measures 
#Dgrid_file = r"C:\Users\rklotz\Documents\data\apeep_maps.gdb\offshore_grid_D"
Dgrid_file_proj = r"H:\My Drive\Boats\ReplicationCode\data\damage_grids\apeep_dist_pop.shp"

# InMAP
Dgrid_InMap_file_proj = r"H:\My Drive\Boats\ReplicationCode\data\damage_grids\inmap.shp"  # this is updated version

#### OPTIONS #####
# downsampling time for fuel calculations
fuel_ds = '10min' # intervals in fuel calcs
apeep_ds = '30min' # intervals for local pollution damages and apeep and for final output 
                    # need something here fairly fine to pick up speed profile changes

max_kmh = 70 # maximum km/hr - smoothed if above

# densifying distance
dense_dist = "40 kilometers" # km to densify at
tracks_dense_folder = r"H:\My Drive\Boats\ReplicationCode\data\AIS\voyages_dense.gdb"
tracks_dense_file = r"\wc_dense%s"

# variables to include from tracks file
tracks_var_list = ['OID@','MMSI','IMO','PORT1','PORT2','TIME1','TIME2','RIGHT1','RIGHT2','VesselType','Length', \
            'Width','PORT_ID','DIST','DIST_ECA2009','DIST_ECA2011','TIME_ECA2009','TIME_ECA2011', \
            'TrackStartTime','TRACK_ID','INTERP_FLAG','INTERP_TRACKS'] 

pnts_char_list = ['MMSI','IMO'] # 'PORT1','PORT2','TIME1','TIME2','RIGHT1','RIGHT2','VesselType','Length','Width']
pnts_var_list = ['OID@','SHAPE@X','SHAPE@Y','SHAPE@M'] + pnts_char_list



### Output Locations ############################
# location for processed downsampled points layer
#ds_pnts_folder = r"H:\My Drive\Boats\ReplicationCode\data\AIS\voyage_pts_wFuel_damages.gdb"
#ds_pnts_file = r"\track_pnts%s"

# location for downsampled points csv
ds_csv_file=r'H:\My Drive\Boats\ReplicationCode\data\AIS\voyages\points_ds%s.csv'

# location for tracks csvs (don't have air pollution)
track_csv_file=r'H:\My Drive\Boats\ReplicationCode\data\AIS\voyages\voyages%s.csv'

# location for distance by speed bins
speed_bins_csv_file=r'H:\My Drive\Boats\ReplicationCode\data\AIS\voyages\speed_bins%s.csv'
#################################################


spatial_reference = arcpy.SpatialReference(4326) # GCS_WGS_1984
spatial_reference_proj = arcpy.SpatialReference(102010) # GCS_WGS_1984


###############################################################################
# reproject damages grid 
#print("Projecting Damages Grid")
#arcpy.Project_management(Dgrid_file, Dgrid_file_proj, spatial_reference)

# load the ECA boundaries
#print("Load ECA Boundaries")
#CAeca2009_geom = arcpy.CopyFeatures_management(CAeca2009_file,arcpy.Geometry())[0]
#CAeca2011_geom = arcpy.CopyFeatures_management(CAeca2011_file,arcpy.Geometry())[0]

# Create output gdb if needed
# if not arcpy.Exists(ds_pnts_folder) == True :
#     print "Create Output GDB"
#     #print os.path.dirname(out_gdb) , os.path.basename(out_gdb)
#     arcpy.CreateFileGDB_management( os.path.dirname(ds_pnts_folder) , os.path.basename(ds_pnts_folder) )

if not arcpy.Exists(tracks_dense_folder) == True :
    print "Create Output GDB"
    #print os.path.dirname(out_gdb) , os.path.basename(out_gdb)
    arcpy.CreateFileGDB_management( os.path.dirname(tracks_dense_folder) , os.path.basename(tracks_dense_folder) )

# make folder for output
if not os.path.exists(os.path.split(track_csv_file)[0]):
    os.makedirs(os.path.split(track_csv_file)[0])   
   
# loading interpolated times
df_interp_times_raw = pandas.read_csv(interp_times)    
df_interp_times_raw.TrackStartTime = pandas.to_datetime(df_interp_times_raw.TrackStartTime , utc=True) # converting to datetime
df_interp_times_raw.InterpStartTime = pandas.to_datetime(df_interp_times_raw.InterpStartTime , utc=True) # converting to datetime
df_interp_times_raw.InterpEndTime = pandas.to_datetime(df_interp_times_raw.InterpEndTime , utc=True) # converting to datetime
df_interp_times_raw.TrackStartTime = df_interp_times_raw.TrackStartTime.dt.round('min')

# merging CA ECA boundaries
print("Merging ECA and Other Boundaries")
eca_na_fc = "in_memory/eca_fc"
arcpy.Merge_management([CAeca2009_file, CAeca2011_file, NAbounds_file, StudyAreabounds_file , NAECA_file ] , eca_na_fc)


# Process (looping through all year tags)
for out_tag in out_tags :
    print(out_tag)
    start_time = time.time()

    print("Load points and densifying")
    # this fills in interpolated tracks
    tracks_dense = r"in_memory/tracks_dense"
    arcpy.GeodeticDensify_management (tracks_file % out_tag, tracks_dense, "GEODESIC", dense_dist)
     
    # saving 
    if arcpy.Exists( tracks_dense_folder + "\\" + tracks_dense_file % out_tag  ):
        arcpy.Delete_management( tracks_dense_folder + "\\" + tracks_dense_file % out_tag )
    arcpy.FeatureClassToFeatureClass_conversion(tracks_dense, tracks_dense_folder, tracks_dense_file % out_tag )
    
    # loading to pandas table (for use later)
    df_tracks_in = pandas.DataFrame( arcpy.da.FeatureClassToNumPyArray(
                                    tracks_dense ,field_names=tracks_var_list, where_clause=tracks_where ,
                                     null_value = -9999, explode_to_points= False,
                                    spatial_reference = spatial_reference ) )
    df_tracks_in = df_tracks_in.rename(columns={'OID@':'OID'})
    if out_tag in ["_2015", "_2016"] :
        df_tracks_in['IMO'] = df_tracks_in["IMO"].str.lstrip("IMO").astype(int) # stripping IMO
    
    # converting to include utc (doesn't change times at all)
    df_tracks_in.TrackStartTime = pandas.to_datetime( df_tracks_in.TrackStartTime , utc=True)
            
    # start timer
    print(time.time() - start_time)
    
    print("Load points to dataframe")
    # load points into dataframe
    df_points = pandas.DataFrame( arcpy.da.FeatureClassToNumPyArray(
                                    tracks_dense ,pnts_var_list,where_clause=tracks_where ,
                                     null_value = -9999, explode_to_points= True,
                                    spatial_reference = spatial_reference ) )
    #arcpy.Delete_management(tracks_dense)
    
    # stripping IMO string from IMO codes
    if out_tag in ["_2015", "_2016"] :
        df_points['IMO'] = df_points["IMO"].str.lstrip("IMO").astype(int)
    #df_points['IMO'] = df_points['IMO'].map( lambda x: x.lstrip('IMO') )
        
    # make time column
    ### here adjusting for dates in M stored as local time (need to convert to UTC)
    hours_added = datetime.timedelta(hours = 5)    
    df_points['time'] = pandas.to_datetime(df_points['SHAPE@M'],unit='s',utc=True) - hours_added  
    df_points = df_points.drop(labels='SHAPE@M' , axis=1)
    df_points = df_points.rename(columns={'SHAPE@X':'X','SHAPE@Y':'Y','OID@':'OID'})
       
    # down sampling (taking first point)
    df_points['date_ds'] = df_points['time'].dt.floor(fuel_ds) # 'H' 
    df_points_ds = df_points.groupby(by=['OID','date_ds'],as_index=False).first()
    
    # add last points of track
    df_points_ds = df_points_ds.append( df_points.groupby(by=['OID'],as_index=False).last() , sort=True )
    df_points_ds = df_points_ds.drop_duplicates(subset=['OID','time'],keep='first') # dropping duplicates (last point already included)
    df_points_ds = df_points_ds.sort_values(by=['OID','time'])
    
    
    ### ISSUE HERE, start end times don't line up with timestamps of points on tracks
    ### TIME1 and TIME2 are in local time, while timestamps in UTC
    ### corrected after 8/10/2020
    
    #df_points.to_csv('track_points.csv')
    del df_points
    print(time.time() - start_time)

    ### determining which points are interpolated
    print('Finding Interpolated Points')
    # add OID TO df_interp_times
    # merging all interpolated times
    # keeping only those that lie (temporally) within a track
    # rounding TrackStartTime to minute
    df_tracks_in.TrackStartTime = df_tracks_in.TrackStartTime.dt.round('min')
    df_interp_times = df_interp_times_raw.merge( df_tracks_in[['MMSI','TrackStartTime','TIME1','TIME2','INTERP_FLAG','OID']] , on=['MMSI','TrackStartTime','INTERP_FLAG'] , how='outer' )
    df_interp_times = df_interp_times.loc[df_interp_times.INTERP_TRACK.notna()] # gets rid of extra tracks from outer join
    df_interp_times.TIME1 = pandas.to_datetime(df_interp_times.TIME1 , utc=True) # converting to datetime
    df_interp_times.TIME2 = pandas.to_datetime(df_interp_times.TIME2 , utc=True) # converting to datetime
    df_interp_times['interp_in_track'] = (df_interp_times.TIME1<df_interp_times.InterpStartTime) & (df_interp_times.InterpEndTime<df_interp_times.TIME2)
    df_interp_times = df_interp_times.loc[df_interp_times.interp_in_track] # keep only those for which interpolation is within track
    
    # get index of matches for each oid
    df_interp_times['interp_num'] = df_interp_times.groupby(by='OID')[['OID']].cumcount() + 1
    
    # loop through all sets of matches
    # so accounts for multiple interpolations on same track
    df_points_ds['interp_pt'] = 0 
    for i in df_interp_times.interp_num.unique():
        df_interp_tmp = df_interp_times[df_interp_times.interp_num==i]
        df_points_ds = df_points_ds.merge( df_interp_tmp[['OID','InterpStartTime','InterpEndTime']] , on='OID' , how='left' )
        df_points_ds['interp_pt'] = np.where( (df_points_ds.interp_pt==1) | ( (df_points_ds.date_ds > df_points_ds.InterpStartTime) & (df_points_ds.date_ds < df_points_ds.InterpEndTime) ) , 1 , 0 )
        df_points_ds = df_points_ds.drop(columns=['InterpStartTime','InterpEndTime'])
   
    # check if points within CA ECAs
    print('Calculating distance and in ECA')
    df_points_ds = dist_funs.checkECA_NA(df_points_ds,eca_na_fc,spatial_reference)
    df_points_ds = dist_funs.calcDist(df_points_ds,spatial_reference,spatial_reference_proj)   
    print(time.time() - start_time)
              
#    print('Iterating Through Table')
#    ### Now iterate through table and
#        #1) check if each point in ECA
#        #2) calculate distance and time
#        
#    # doing all iteration calculations at once
#    df_points_ds[['Xprev','Yprev']] = df_points_ds.groupby(['OID'])[['X','Y']].shift(1)
#    df_points_ds['d_km_geo'] = df_points_ds.apply(lambda row :
#                                       getDist( row['X'], row['Y'] ,row['Xprev'], row['Yprev'] , spatial_reference 
#                                            ) , axis=1 , result_type='expand' , raw=True )
#    df_points_ds = df_points_ds.drop(labels=['Xprev','Yprev'],axis=1)
  
    
    # calculate time on each segment (first point is 0)
    df_points_ds = df_points_ds.sort_values(by=['OID','time'])
    df_points_ds['d_time_hr'] = ( df_points_ds.groupby('OID')['time'].diff().dt.total_seconds()/3600 ).fillna(0)
    
    # calculate speed
    df_points_ds['kmh'] = df_points_ds['d_km'] / df_points_ds['d_time_hr']
    
    # smooth speed
    df_points_ds.loc[df_points_ds.kmh>max_kmh,'kmh'] = np.nan # set really fast values to nan
    # the reset index here only works if grouping on single vessel 
    # doing a 3 segment interval centered at current pointm and requires 1 observation
    df_points_ds['kmh_rolling'] = df_points_ds['kmh'].groupby(df_points_ds['OID']).rolling(3,center=False,min_periods=1).mean().reset_index(0,drop=True)
    df_points_ds['kmh'] = np.where(df_points_ds.kmh.isnull, df_points_ds.kmh_rolling , df_points_ds.kmh)    
    
    # calculate time weighted speed^3
    df_points_ds['s3_dt'] = df_points_ds['kmh']**3 * df_points_ds['d_time_hr']
    df_points_ds['s3_dt_eca09'] = df_points_ds['s3_dt'] * df_points_ds['in_eca09']
    df_points_ds['s3_dt_eca11'] = df_points_ds['s3_dt'] * df_points_ds['in_eca11']
    df_points_ds['s3_dt_naeca'] = df_points_ds['s3_dt'] * df_points_ds['in_naeca']
    df_points_ds['s3_dt_notinterp'] = df_points_ds['s3_dt'] * ( 1 -  df_points_ds['interp_pt'] ) # fuel consumption for uninterpolated portion of track
    df_points_ds['d_time_notinterp'] = df_points_ds['d_time_hr'] * ( 1 -  df_points_ds['interp_pt'] ) # fuel consumption for uninterpolated portion of track
    
    df_points_ds['km_onland'] = df_points_ds['d_km'] * df_points_ds['onland']
    df_points_ds['km_studyarea'] = df_points_ds['d_km'] * df_points_ds['studyarea']
    df_points_ds['km_naeca'] = df_points_ds['d_km'] * df_points_ds['in_naeca']
    df_points_ds['km_interp'] = df_points_ds['d_km'] * df_points_ds['interp_pt']
    df_points_ds['km_interp_naeca'] = df_points_ds['d_km'] * df_points_ds['in_naeca'] * df_points_ds['interp_pt'] # interpolated distance inside na eca
       
    # now calculating distance outside na eca
    # getting values both within 160km of NA ECA and well outside
    # need this for applying speed changes   
    df_points_ds = df_points_ds.sort_values(by=['OID','time'],ascending=[True,True]) 
    df_points_ds['km_cum'] = df_points_ds.groupby(['OID'])['d_km'].cumsum()

    df_points_ds['in_naeca_prev'] = df_points_ds.groupby('OID')['in_naeca'].shift()
    df_points_ds['naeca_cross'] = df_points_ds.in_naeca.ne(df_points_ds.in_naeca_prev) & df_points_ds.in_naeca_prev.notna()
    #df_points_ds['naeca_cross_pt'] = df_points_ds.pnt_ID * df_points_ds.naeca_cross

    # frame of all cross points
    df_points_cross = df_points_ds[df_points_ds.naeca_cross==1][['OID','km_cum']]  
    df_points_cross = df_points_cross.groupby(by="OID",as_index=False).km_cum.agg(['max','min','count']).add_suffix('_km_cum_naeca') 
    df_points_cross = df_points_cross.rename(columns={'count_km_cum_naeca':'total_crosses_naeca'})
    
    # get distance from first cross (only outside ECA)
    df_points_ds = df_points_ds.merge(df_points_cross,on='OID',how='left')
    del df_points_cross
    df_points_ds['km_out_naeca'] = np.where( df_points_ds.in_naeca==0 , abs( df_points_ds.km_cum - df_points_ds.max_km_cum_naeca) , np.nan) 
    
    # for calculating total interpolated distance within 160km of NA ECA and beyound 160km of NAECA
    df_points_ds['km_interp_naeca_less160'] = ( df_points_ds.km_out_naeca<160 ) * df_points_ds.d_km * df_points_ds['interp_pt'] 
    df_points_ds['km_interp_naeca_more160'] = ( df_points_ds.km_out_naeca>=160 ) * df_points_ds.d_km * df_points_ds['interp_pt'] 
    
    # calculate average speed unaffected by NA ECA or CA ECA
    # calling this "cruising" speed which would also be relevant for NA ECA
    # doing 160km outside of CA ECA (2011 bounds), in NA ECA, not interpolated
    df_points_ds['in_eca11_prev'] = df_points_ds.groupby('OID')['in_eca11'].shift()
    df_points_ds['eca11_cross'] = df_points_ds.in_eca11.ne(df_points_ds.in_eca11_prev) & df_points_ds.in_eca11_prev.notna()
    
    df_points_cross = df_points_ds[df_points_ds.eca11_cross==1][['OID','km_cum']]  
    df_points_cross = df_points_cross.groupby(by="OID",as_index=False).km_cum.agg(['max','min','count']).add_suffix('_km_cum_eca11') 
    df_points_cross = df_points_cross.rename(columns={'count_km_cum_eca11':'total_crosses_eca11'})
    
    df_points_ds = df_points_ds.merge(df_points_cross,on='OID',how='left')
    del df_points_cross
    df_points_ds['km_out_eca11'] = np.where( df_points_ds.in_eca11==0 , abs( df_points_ds.km_cum - df_points_ds.max_km_cum_eca11) , np.nan) 
        
    df_points_ds['cruise'] = (df_points_ds.km_out_eca11>=160) & (df_points_ds.in_naeca) & (1-df_points_ds.interp_pt)
    df_points_ds['km_cruise'] = df_points_ds.d_km * df_points_ds.cruise
    df_points_ds['hr_cruise']  = df_points_ds.d_time_hr * df_points_ds.cruise 
        
    print(time.time() - start_time) 
            
    ### CALCULATING TRACK AGGREGATES BASED ON FINER DATA
    print('Aggregating some vars and saving to tracks file')
    # collapse downsample points to get fuel consumption measures by track
    # merge this against unexploded track table
    df_s3 = df_points_ds[['OID','s3_dt','s3_dt_eca09','s3_dt_eca11','s3_dt_naeca' , 's3_dt_notinterp' , 'd_time_notinterp' \
                               ,'km_onland','km_naeca','km_studyarea','km_interp','km_interp_naeca','km_interp_naeca_less160','km_interp_naeca_more160' \
                               ,'km_cruise','hr_cruise']].groupby(['OID'],as_index=False ).sum()
    df_s3['naeca_interp'] = np.where(df_s3.km_interp_naeca==0,0,1) #### determining if track is interpolated within NA ECA
    df_s3['kmh_cruise'] = df_s3.km_cruise / df_s3.hr_cruise
    df_s3 = df_s3.drop(columns=['km_cruise','hr_cruise'])

    #### determining if track is interpolated within NA ECA (OLD)
    #df_naeca = df_points_ds[df_points_ds.in_naeca==1].groupby(by='OID',as_index=False)[['interp_pt']].mean()
    #df_naeca = df_naeca.rename(columns={'interp_pt':'naeca_interp'})
    #df_s3 = df_s3.merge(df_naeca,how='left',on='OID')
        
    # merging in the total time weighted speed^3 values
    df_tracks = df_tracks_in.merge(df_s3,how='left',on='OID')
    
    print('Saving tracks' )
    # these are used to collect mmsi--imo mapping and for some track totals (km_studyarea, naeca measures, interpolated distance)
    df_tracks.to_csv( track_csv_file % out_tag ,index_label='ID_track')
    print(time.time() - start_time)
    
   
    print('Calculating Speed Bins' )
    # discretize speed, calculate distance in speed bins
    # then save
    df_points_ds.kmh = df_points_ds.kmh.fillna(method='backfill',limit=1) # filling na speeds (first point) with next speed
    kmh_bins = list(range(0,16,8)) + list(range(16,48,2))
    df_points_ds['kmh_b'] = pandas.cut(df_points_ds.kmh, kmh_bins + [10000] , labels=kmh_bins)  
    df_points_ds.d_km = df_points_ds.d_km.fillna(0)
    
    df_points_ds['d_km_eca09'] = df_points_ds.d_km * df_points_ds.in_eca09
    df_points_ds['d_km_eca11'] = df_points_ds.d_km * df_points_ds.in_eca11
    df_kmh_bins = df_points_ds[['OID','kmh_b','d_km','d_km_eca09','d_km_eca11'] ].groupby(['OID','kmh_b'],as_index=False).sum().fillna(0)
       
     # adding track level information
    df_kmh_bins = df_kmh_bins.merge(df_tracks,on="OID",how='left')
    
    print('Saving Speed Bins')
    df_kmh_bins.to_csv( speed_bins_csv_file % out_tag ,index_label='ID_track')
    print(time.time() - start_time)
    
    print('Down sampling for Marginal Damages') # also selects only necessary variables
    df_points_ds['date_ds_apeep'] = df_points_ds['time'].dt.floor(apeep_ds) # 'H'
    df_points_apeep = df_points_ds[['OID','date_ds_apeep','s3_dt','s3_dt_eca09','s3_dt_eca11','s3_dt_naeca','d_km','d_time_hr','km_onland','km_interp']].groupby(by=['OID','date_ds_apeep'],as_index=False).sum()
    # adding X Y coordinates
    df_XY_apeep = df_points_ds[['OID','date_ds_apeep','X','Y']].groupby(by=['OID','date_ds_apeep'],as_index=False).first()
    df_points_apeep = df_points_apeep.merge(df_XY_apeep,on=['OID','date_ds_apeep'])
    
    df_points_apeep['kmh'] = df_points_apeep['d_km']/df_points_apeep['d_time_hr'] # calculating speed
    
    # calculating indicators that is fraction of total power required
    df_points_apeep['in_eca09'] = df_points_apeep['s3_dt_eca09']/df_points_apeep['s3_dt']
    df_points_apeep['in_eca11'] = df_points_apeep['s3_dt_eca11']/df_points_apeep['s3_dt']
    df_points_apeep['in_naeca'] = df_points_apeep['s3_dt_naeca']/df_points_apeep['s3_dt']
    df_points_apeep['onland'] = df_points_apeep['km_onland']/df_points_apeep['d_km']
    df_points_apeep['interp'] = df_points_apeep['km_interp']/df_points_apeep['d_km']
    df_points_apeep = df_points_apeep.drop(['km_interp'], axis=1)
    
    # cumulative distance
    df_points_apeep['km_cum'] = df_points_apeep.groupby(['OID'])['d_km'].cumsum()
    
    # merge in other characteristics   
    df_points_apeep = df_points_apeep.merge(df_tracks_in,how='left',on='OID')
   
    # saving downsampled points as feature class
    df_points_apeep['pnt_ID'] = df_points_apeep.index
    df_points_apeep['month1'] = df_points_apeep.TIME1.dt.month
    df_points_apeep['day1'] = df_points_apeep.TIME1.dt.day
    fc_ds_points_apeep = 'in_memory/ds_points'
    # list of variables to add to downsample points layer
    vars_for_pnts_lyr = ['pnt_ID','OID','X','Y','MMSI','IMO','PORT1','PORT2','RIGHT1','RIGHT2'
                         ,'VesselType','Length','Width','month1','day1','onland' ,'s3_dt' , 'interp' , 'in_naeca' , 'in_eca09', 'in_eca11' ]
    arcpy.da.NumPyArrayToFeatureClass (df_points_apeep[ vars_for_pnts_lyr ].to_records(index=False), 
                                           fc_ds_points_apeep , ('X','Y'), spatial_reference=spatial_reference)
    
    print("Joining damages")
    fc_ds_points_tmp = 'in_memory/ds_points_merged_tmp' 
    fc_ds_points_merged = 'in_memory/ds_points_merged' 
    # AP2
    arcpy.SpatialJoin_analysis(fc_ds_points_apeep, Dgrid_file_proj, fc_ds_points_tmp, join_operation='JOIN_ONE_TO_ONE' 
                               , join_type='KEEP_ALL' , match_option='WITHIN' )
    # InMAP
    arcpy.SpatialJoin_analysis(fc_ds_points_tmp, Dgrid_InMap_file_proj, fc_ds_points_merged, join_operation='JOIN_ONE_TO_ONE' 
                               , join_type='KEEP_ALL' , match_option='WITHIN' )
    arcpy.Delete_management(fc_ds_points_apeep)
    arcpy.Delete_management(fc_ds_points_tmp)   
    
    # now merging damages into table and saving
    df_points_D = pandas.DataFrame( arcpy.da.FeatureClassToNumPyArray(
                                    fc_ds_points_merged,field_names=['pnt_ID','GRID_ID','kmpop_us','kmpop_ca','kmpop_la','kmpop_lasf' \
                                    ,'distUScoas','D_PM','D_SO2_SO4','D_PM_inm','D_SO2_inm','D_NOx_inm','D_VOC_inm','D_NH3_inm'], where_clause="",
                                     null_value = -9999, explode_to_points= False,
                                    spatial_reference = spatial_reference ) )
    df_points_apeep = df_points_apeep.merge(df_points_D,on='pnt_ID',how='left')
       
    print("Saving Points CSV")
    # saving as CSV
    df_points_apeep.to_csv( ds_csv_file % out_tag ,index_label='ID_pnt')
    print(time.time() - start_time)
    
    # saving as layer
    #print("Saving Points Layer")
    #arcpy.FeatureClassToFeatureClass_conversion(fc_ds_points_merged, ds_pnts_folder, ds_pnts_file % out_tag )

    del fc_ds_points_merged
    del df_points_ds
    #del df_points
    del df_kmh_bins